This is the visualisation script for the greenhouse gas data product which was used in the QA/QC process and review of the data package 1. It takes the data file on EDI and combines it with the L1 file which has already been QAQCed. 3. Lists the flag frequency to check if there are any NAs or any assigned the wrong flag. 5. Creates plots to visualise all variables and sites 6. Writes the new combined data to new csv

This section checks to make sure each observation has a data flag. It also checks to make sure the frequency of flags match what we expect to see.

#make sure no NAS in the Flag columns
Flags <- current_df |> 
  select(DateTime, starts_with("Flag"))

RowsNA <- Flags[!complete.cases(Flags), ] # Keep only the complete rows

#check the flag column
Flags <- current_df |> 
  select(starts_with("Flag"))

# Make a table with the number of times a flag was used
for(f in 1:(ncol(Flags))){
  #print(colnames(Flags[f]))
  print(table(Flags[,f], useNA = "always"))
}
## 
##    0    1 <NA> 
## 5322 2630    0 
## 
##    0    1    2    3    4    6    7 <NA> 
## 5509  624  398  387  793   53   15  173 
## 
##    0    1    2    3    4    6    7 <NA> 
## 5830  741  329  159  470  159   18  246

Check to make sure that what is in the maintenance log was actually removed

Look at the last rows of the maintenance log

We want to make sure that our maintenance log actually worked and took out the values or changes those it was supposed to

## Rows: 9 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Reservoir, DataStream, TIMESTAMP_start, TIMESTAMP_end, start_parame...
## dbl (5): Site, Depth, Reps, vial_number, flag
## lgl (1): update_value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 13
##   Reservoir  Site Depth  Reps DataStream TIMESTAMP_start         TIMESTAMP_end  
##   <chr>     <dbl> <dbl> <dbl> <chr>      <chr>                   <chr>          
## 1 FCR        50     3.8    NA GHG        2024-04-15 09:53:00 EDT 2024-04-15 10:…
## 2 BVR        50     3      NA GHG        2024-05-06 10:32:00 EDT 2024-05-06 11:…
## 3 FCR        50     5      NA GHG        2024-05-31 11:42:00 EDT 2024-05-31 12:…
## 4 FCR         1.1   0.1    NA GHG        2024-08-26 10:56:00 EDT 2024-08-26 11:…
## 5 FCR       200     0.1    NA GHG        2024-09-02 13:15:00 EDT 2024-09-02 14:…
## 6 FCR        50     8      NA GHG        2024-12-03 10:52:00 EDT 2024-12-03 10:…
## # ℹ 6 more variables: start_parameter <chr>, end_parameter <chr>,
## #   vial_number <dbl>, flag <dbl>, update_value <lgl>, notes <chr>
Reservoir Site Depth Reps DataStream TIMESTAMP_start TIMESTAMP_end start_parameter end_parameter vial_number flag update_value notes
FCR 50.0 3.8 NA GHG 2024-04-15 09:53:00 EDT 2024-04-15 10:53:00 EDT CO2_umolL NA 328 1 NA Moisture interfered with CO2 peak and not usable
BVR 50.0 3.0 NA GHG 2024-05-06 10:32:00 EDT 2024-05-06 11:32:00 EDT CO2_umolL NA 271 1 NA Moisture interfered with CO2 peak
FCR 50.0 5.0 NA GHG 2024-05-31 11:42:00 EDT 2024-05-31 12:42:00 EDT CO2_umolL NA 21 1 NA Moisture interfered with CO2 peak
FCR 1.1 0.1 NA GHG 2024-08-26 10:56:00 EDT 2024-08-26 11:56:00 EDT CO2_umolL NA 97 1 NA Moisture interfered with CO2 peak
FCR 200.0 0.1 NA GHG 2024-09-02 13:15:00 EDT 2024-09-02 14:15:00 EDT CO2_umolL NA 345 1 NA Moisture interfered with CO2 peak
FCR 50.0 8.0 NA GHG 2024-12-03 10:52:00 EDT 2024-12-03 10:54:00 EDT CO2_umolL NA 215 1 NA Very end of CO2 peak was cut off

Check the that the columns have flags

Look at the first few rows of the data frame and check that the observations after the TIMESTAMP_start are flagged

Look at the first 5 rows for that time

##                 DateTime CO2_umolL Flag_CO2_umolL
## 7947 2024-12-03 10:53:00        NA              1
## 7948 2024-12-03 10:53:00   67.0538              4
##                 DateTime CO2_umolL Flag_CO2_umolL
## 7947 2024-12-03 10:53:00        NA              1
## 7948 2024-12-03 10:53:00   67.0538              4
# Plot CH4 at Site 50 in FCR
current_df |>  
  filter(Depth_m<100,  Site == 50,
         Reservoir == "FCR") |> 
    mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
  geom_point() +
  labs(title = 'FCR CH4')
## Warning: Removed 361 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Plot CO2 at Site 50 in FCR
current_df |>  
  filter(Depth_m<100, Site == 50,
         Reservoir == "FCR") |> 
    mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
  geom_point()+
  labs(title = 'FCR CO2')
## Warning: Removed 446 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Plot CH4 at Site 50 in BVR
current_df |>  
  filter(Depth_m<100, Site == 50,
         Reservoir == "BVR") |> 
    mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
  geom_point()+
  labs(title = 'BVR CH4')
## Warning: Removed 168 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Plot CO2 at Site 50 in BVR
current_df |>  
  filter(Depth_m<100, Site == 50,
         Reservoir == "BVR") |> 
    mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
  geom_point() +
  labs(title = 'BVR CO2')
## Warning: Removed 195 rows containing missing values or values outside the scale range
## (`geom_point()`).

# FCR NOT Site 50 - CH4
current_df |>  
  filter(Reservoir == "FCR", Site != 50) |> 
  ggplot(aes(x=DateTime, y=CH4_umolL, colour=as.factor(Site)))+
  geom_point() +
  labs(title = 'FCR CH4 other sites')
## Warning: Removed 80 rows containing missing values or values outside the scale range
## (`geom_point()`).

current_df |>  
  filter(Reservoir == "FCR", Site != 50) |> 
  ggplot(aes(x=DateTime, y=CO2_umolL, colour=as.factor(Site)))+
  geom_point() +
  labs(title = 'FCR CO2 other sites')
## Warning: Removed 86 rows containing missing values or values outside the scale range
## (`geom_point()`).

# BVR NOT Site 50 
current_df %>% 
  filter(Reservoir == "BVR", Site != 50) |> 
  ggplot(aes(x = DateTime, y = CH4_umolL, colour = as.factor(Site)))+
  geom_point() +
  labs(title = 'BVR CH4 other sites')
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`geom_point()`).

current_df %>% 
  filter(Reservoir == "BVR", Site != 50) |>  
  ggplot(aes(x = DateTime, y = CO2_umolL, colour = as.factor(Site)))+
  geom_point() +
  labs(title = 'BVR CO2 other sites')
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

Look at this year only:

# CH4 plots current year
a <- current_df |>  
  filter(Depth_m<100,
         Reservoir == "FCR", Site != 50, 
         DateTime >= '2024-01-01') |>
  mutate(Site = as.factor(Site)) |> 
  ggplot(aes(x = DateTime, y = CH4_umolL, colour = Site))+
  geom_point() +
  labs(title = 'FCR CH4')

  ggplotly(a)
b<- current_df |>  
  filter(Depth_m<100,
         Reservoir == "FCR", Site == 50, 
         DateTime >= '2024-01-01') |> 
  mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
  geom_point() +
  labs(title = 'FCR Site 50, CH4')

  ggplotly(b)
c <- current_df |>  
  filter(Depth_m<100,
         Reservoir == "BVR", Site != 50, 
         DateTime >= '2024-01-01') |>
  mutate(Site = as.factor(Site)) |> 
  ggplot(aes(x = DateTime, y = CH4_umolL, colour = Site))+
  geom_point() +
  labs(title = 'BVR CH4')

ggplotly(c)
d <- current_df |>  
  filter(Depth_m<100,
         Reservoir == "BVR", Site == 50, 
         DateTime >= '2024-01-01') |> 
  mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
  geom_point() +
  labs(title = 'BVR Site 50, CH4')

ggplotly(d)
# CO2 plots current year
e<- current_df |>  
  filter(Depth_m<100,
         Reservoir == "FCR", Site != 50, 
         DateTime >= '2024-01-01') |>
  mutate(Site = as.factor(Site)) |> 
  ggplot(aes(x = DateTime, y = CO2_umolL, colour = Site))+
  geom_point() +
  labs(title = 'FCR CO2')

ggplotly(e)
f <- current_df |>  
  filter(Depth_m<100,
         Reservoir == "FCR", Site == 50, 
         DateTime >= '2024-01-01') |> 
  mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
  geom_point() +
  labs(title = 'FCR Site 50, CO2')

ggplotly(f)
g <- current_df |>  
  filter(Depth_m<100,
         Reservoir == "BVR", Site != 50, 
         DateTime >= '2024-01-01') |>
  mutate(Site = as.factor(Site)) |> 
  ggplot(aes(x = DateTime, y = CO2_umolL, colour = Site))+
  geom_point() +
  labs(title = 'BVR CO2')

ggplotly(g)
h <- current_df |>  
  filter(Depth_m<100,
         Reservoir == "BVR", Site == 50, 
         DateTime >= '2024-01-01') |> 
  mutate(Depth_m = as_factor(Depth_m)) |> 
  ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
  geom_point() +
  labs(title = 'BVR Site 50, CO2')

ggplotly(h)
# convert datetimes to characters so that they are properly formatted in the output file
 current_df$DateTime <- as.character(format(current_df$DateTime))

# Need to decide on a naming convention for this file
write.csv(current_df, "ghg_2015_2024.csv", row.names = F)